#if (!require("remotes")) {
#  install.packages("remotes")
#}
#remotes::install_github("MathiasHarrer/dmetar")

# load packages
packages <- c("zcurve", "dmetar", "tidyverse", "sandwich", 
                         "lmtest", "Hmisc", "metafor")

# Check if each package is installed, and install if not
for (pkg in packages) {
  if (!requireNamespace(pkg, quietly = TRUE)) {
    install.packages(pkg, dependencies = TRUE)
  }
}

# Load the installed packages
library(zcurve)
library(dmetar)
library(tidyverse)
library(sandwich)
library(lmtest)
library(Hmisc)
library(metafor)

# load data
load("curve.RData")

#data wrangling
curve <- curve %>% 
  drop_na(seTE) %>% 
  filter(seTE != "NA")

curve$TE <- as.numeric(curve$TE)
curve$seTE <- as.numeric(curve$seTE)

curve$studlab <- 1:1095

curve$Z <- curve$TE / curve$seTE
curve$pureZ <- abs(curve$Z)

# create data frame containing only statistically significant z-statistics
curve2 <- curve %>% 
  filter(pureZ >= 1.96)

# create data frames for discrepancy-free z-statistics
curveclean <- curve[-which(curve$pureZ>=1.96 & curve$OriginalP == ">=0.05"),]
curveclean2 <- curveclean %>% 
  filter(Paper != "Curtice 2021" & Paper != "Frey 2022" & Paper != "Heap et al. 2021" &
           Paper != "Kikuta & Uesugi 2023" & Paper != "Larsen et al. 2020" & 
           Paper != "Merler 2021" & Paper != "Nussio 2020" & Paper != "Thompson 2022")


# zcurve
# estimate the z-curve
set.seed(321)
zc <- zcurve(curve$Z, method = "EM", bootstrap = 100)
# plot the z-curve
plot(zc, annotation = TRUE, CI = TRUE, extrapolate = TRUE, cex=1.8, cex.axis=1.8, cex.lab=1.5)
# print estimates
ODR(zc, round.coef = 3)
summary.zcurve(zc, round.coef = 3)
file_drawer_ration(zc, round.coef = 3)


## for terrorist attacks
tercurve <- curve %>% 
  filter(Event == "terrorist attack")

# estimate the z-curve
set.seed(321)
zc2 <- zcurve(tercurve$Z, method = "EM", bootstrap = 100)
# plot the z-curve
plot(zc2, annotation = TRUE, CI = TRUE, extrapolate = TRUE, cex=1.8, cex.axis=1.8, cex.lab=1.5)


## for electoral results
elecurve <- curve %>% 
  filter(Event == "election result")

# estimate the z-curve
set.seed(321)
zc3 <- zcurve(elecurve$Z, method = "EM", bootstrap = 100)
# plot the z-curve
plot(zc3, annotation = TRUE, CI = TRUE, extrapolate = TRUE, cex=1.8, cex.axis=1.8, cex.lab=1.5)


## for police violence
polcurve <- curve %>% 
  filter(Event == "police violence")

# estimate the z-curve
set.seed(321)
zc4 <- zcurve(polcurve$Z, method = "EM", bootstrap = 100)
# plot the z-curve
plot(zc4, annotation = TRUE, CI = TRUE, extrapolate = TRUE, cex=1.8, cex.axis=1.8, cex.lab=1.5)



# compare N.Obs below significance threshold to N.Obs above it
belowZ <- curve$pureZ >= 1.86 & curve$pureZ <= 1.95
count_below <- sum(belowZ)
print(count_below)

aboveZ <- curve$pureZ >= 1.96 & curve$pureZ <= 2.05
count_above <- sum(aboveZ)
print(count_above)


# violin plot of statistically significant Zs per discipline
# factorize and order X
desired_order <- c("Sociology", "Psychology", "Political Science", "Criminology", "Economics")
curve2$Discipline <- factor(curve2$Discipline, levels = desired_order)

# function for text statistics
get_box_stats <- function(y, upper_limit = max(curve2$pureZ) * 1.15) {
  return(data.frame(
    y = 0.95 * upper_limit,
    label = paste(
      "Mean =", round(mean(y), 2), "\n",
      "Median =", round(median(y), 2), "\n"
    )
  ))
}

get_box_stats <- function(y, upper_limit = max(curve2$pureZ) * 1.15) {
  return(data.frame(
    y = 0.95 * upper_limit,
    label = paste(
      "Mean =", round(mean(y), 2), "\n",
      "Median =", round(median(y), 2), "\n",
      "Max =", round(max(y), 2)
    )
  ))
}

# create the violin plot
ggplot(curve2, aes(x = Discipline, y = pureZ)) +
  geom_violin(size = 1) +
  theme_minimal(base_size = 30) +
  stat_summary(fun.data = get_box_stats, geom = "text", hjust = 0.5, vjust = 0.6, size = 5) +
  stat_summary(fun = "median", geom = "crossbar", width = 0.1, 
               size = 0.5, color = "black", position = position_dodge(width = 0.75)) + 
  stat_summary(fun.y = function(x) quantile(x, 0.25), geom = "point", 
               size = 1.5, color = "firebrick3", position = position_dodge(width = 0.75), 
               shape = 16) + 
  stat_summary(fun.y = function(x) quantile(x, 0.75), geom = "point", 
               size = 1.5, color = "forestgreen", position = position_dodge(width = 0.75), 
               shape = 16) +
  scale_y_continuous(breaks = seq(1, max(curve2$pureZ), by = 1)) + 
  labs(title = "",
       x = "",
       y = "Z") +
  theme(axis.title.y = element_text(angle = 0, vjust = 0.5))


# discipline-specific z-curves
## for political science
pscurve <- curve %>% 
  filter(Discipline == "Political Science")

# estimate the z-curve
set.seed(321)
zcps <- zcurve(pscurve$Z, method = "EM", bootstrap = 100)
# plot the z-curve
plot(zcps, annotation = TRUE, CI = TRUE, extrapolate = TRUE, cex=1.8, cex.axis=1.8, cex.lab=1.5)


## for economics
ecocurve <- curve %>% 
  filter(Discipline == "Economics")

# estimate the z-curve
set.seed(321)
zceco <- zcurve(ecocurve$Z, method = "EM", bootstrap = 100)
# plot the z-curve
plot(zceco, annotation = TRUE, CI = TRUE, extrapolate = TRUE, cex=1.8, cex.axis=1.8, cex.lab=1.5)




# alternative z-curves

## sample clean from all test statistics with discrepancies 
# estimate the z-curve
set.seed(321)
zclean <- zcurve(curveclean$Z, method = "EM", bootstrap = 100)
# plot the z-curve
plot(zclean, annotation = TRUE, CI = TRUE, extrapolate = TRUE, cex=1.8, cex.axis=1.8, cex.lab=1.5)

## sample clean from all studies that contain test statistics with discrepancies 
# estimate the z-curve
set.seed(321)
zclean2 <- zcurve(curveclean2$Z, method = "EM", bootstrap = 100)
# plot the z-curve
plot(zclean2, annotation = TRUE, CI = TRUE, extrapolate = TRUE, cex=1.8, cex.axis=1.8, cex.lab=1.5)


## estimate clustered z-curves
Z <- as.character(curve$pureZ)
observations <- strsplit(Z, " ")
Z <- paste("z =", observations, sep = " ")

# clustered by article
id <- as.character(curve$Paper)

clustercurve <- zcurve_data(Z, id = id, rounded = FALSE)

set.seed(321)
w <- zcurve_clustered(clustercurve, method = "w", bootstrap = 100)
plot(w, annotation = TRUE, CI = TRUE, extrapolate = TRUE, cex=1.8, cex.axis=1.8, cex.lab=1.5)

set.seed(321)
b <- zcurve_clustered(clustercurve, method = "b", bootstrap = 100)
plot(b, annotation = TRUE, CI = TRUE, extrapolate = TRUE, cex=1.8, cex.axis=1.8, cex.lab=1.5)


# clustered by event type
id2 <- as.character(curve$Event)

clustercurve2 <- zcurve_data(Z, id = id2, rounded = FALSE)

set.seed(321)
w2 <- zcurve_clustered(clustercurve2, method = "w", bootstrap = 100)
plot(w2, annotation = TRUE, CI = TRUE, extrapolate = TRUE, cex=1.8, cex.axis=1.8, cex.lab=1.5)

set.seed(321)
b2 <- zcurve_clustered(clustercurve2, method = "b", bootstrap = 100)
plot(b2, annotation = TRUE, CI = TRUE, extrapolate = TRUE, cex=1.8, cex.axis=1.8, cex.lab=1.5)


## pcurve
pcurve(curve)




## funnel plots
load("teroutcomes.RData")

cutcurve <- teroutcomes %>% 
  filter(seTE < 0.5) %>% 
  filter(Outcome == "Negative Attitudes toward Immigrants")
funnel(rma(yi = TE, sei = seTE, data = cutcurve),col=ifelse(cutcurve$pureZ > 1.96, "darkblue", "red"), 
       xlab = "ITT")

cutcurve2 <- teroutcomes %>% 
  filter(seTE < 0.5) %>% 
  filter(Outcome == "Positive Emotions")
funnel(rma(yi = TE, sei = seTE, data = cutcurve2),col=ifelse(cutcurve2$pureZ > 1.96, "darkblue", "red"), 
       xlab = "ITT")





## caliper tests

# 2.5% interval
threshold <- 1.96
interval_size <- 0.049

data <- curve
data$z <- data$pureZ

# Define intervals
lower_interval <- (data$z >= (threshold - interval_size)) & (data$z < threshold)
upper_interval <- (data$z > threshold) & (data$z <= (threshold + interval_size))

# Count observations in each interval
count_below <- sum(lower_interval)
count_above <- sum(upper_interval)

# Print counts
cat("Count below threshold:", count_below, "\n")
cat("Count above threshold:", count_above, "\n")

# Perform binomial test
binom_test <- binom.test(count_above, count_above + count_below, p = 0.5, alternative = "greater")
print(binom_test)



# 5% interval
threshold <- 1.96
interval_size <- 0.098

data <- curve
data$z <- data$pureZ

# Define intervals
lower_interval <- (data$z >= (threshold - interval_size)) & (data$z < threshold)
upper_interval <- (data$z > threshold) & (data$z <= (threshold + interval_size))

# Count observations in each interval
count_below <- sum(lower_interval)
count_above <- sum(upper_interval)

# Print counts
cat("Count below threshold:", count_below, "\n")
cat("Count above threshold:", count_above, "\n")

# Perform binomial test
binom_test <- binom.test(count_above, count_above + count_below, p = 0.5, alternative = "greater")
print(binom_test)


# 10% interval
threshold <- 1.96
interval_size <- 0.196

data <- curve
data$z <- data$pureZ

# Define intervals
lower_interval <- (data$z >= (threshold - interval_size)) & (data$z < threshold)
upper_interval <- (data$z > threshold) & (data$z <= (threshold + interval_size))

# Count observations in each interval
count_below <- sum(lower_interval)
count_above <- sum(upper_interval)

# Print counts
cat("Count below threshold:", count_below, "\n")
cat("Count above threshold:", count_above, "\n")

# Perform binomial test
binom_test <- binom.test(count_above, count_above + count_below, p = 0.5, alternative = "greater")
print(binom_test)


# 15% interval
threshold <- 1.96
interval_size <- 0.294

data <- curve
data$z <- data$pureZ

# Define intervals
lower_interval <- (data$z >= (threshold - interval_size)) & (data$z < threshold)
upper_interval <- (data$z > threshold) & (data$z <= (threshold + interval_size))

# Count observations in each interval
count_below <- sum(lower_interval)
count_above <- sum(upper_interval)

# Print counts
cat("Count below threshold:", count_below, "\n")
cat("Count above threshold:", count_above, "\n")

# Perform binomial test
binom_test <- binom.test(count_above, count_above + count_below, p = 0.5, alternative = "greater")
print(binom_test)


